library(tidyverse)

#### FUNCTIONS ####

country_to_code <- function(x) {
  case_when(x == "Australia" ~ "AUS",
            x == "Ghana" ~ "GHA",
            x == "Brazil" ~ "BR",
            x == "Canada" ~ "CAN",
            x == "Chile" ~ "CHL",
            x == "China" ~ "CHN",
            x == "Colombia" ~ "COL",
            x == "France" ~ "FR",
            x == "Italy" ~ "IT",
            x == "Spain" ~ "SP",
            x == "UK" ~ "UK",
            x == "US" ~ "US",
            x == "Uganda" ~ "UGA",
            x == "India" ~ "IND",
            x == "Japan" ~ "JPN",
            x == "South Africa" ~ "ZAF")
}

set_refs <- function(data,
                     ref_levels = c(gdp = "0%",
                                    jobs = "0%",
                                    supplies = "Quick to procure",
                                    deaths = "10 per million",
                                    vaccinated = "5%",
                                    lockdown = "10 weeks")) {
  
  
  for (var in names(ref_levels)) {
    data[[var]] <- relevel(as.factor(data[[var]]), ref = ref_levels[[var]])
  }
  
  return(data)
}

refresh_data <- function(filepath, countries, incl_combined = TRUE) {
  
  if (!str_ends(filepath, "/")) {
    filepath = paste0(filepath,"/")
  }
  
  country_files <- sapply(countries, function (x) paste0(filepath,"data_",x,".csv"))
  
  if (incl_combined) {
    country_files <- c(country_files, paste0(filepath,"data_combined.csv"))
  }
  
  usr_confirm = str_to_lower(readline("Confirm you are happy to overwrite files (yes/no):  "))
  
  if (usr_confirm == "yes") {
    system(paste0('cd "',filepath,'"; git pull'))
    file.copy(from = country_files, to = "survey_data", overwrite = TRUE)
  } else if (usr_confirm == "no") {
    stop("Exited without copying")
  } else {
    stop("Invalid response: must be 'yes' or 'no' ... exited.")
  }
}

convert_conjoint <- function(data) {
  conjoint_cols <- data %>% 
    select(starts_with("person"), id)
  
  conjoint_data <- lapply(
    1:8,
    function (i) {
      person <- paste0("person",i)
      tmp_data <- conjoint_cols %>% 
        select(starts_with(person), id)
      
      return(
        data.frame(
          round = i, 
          id = tmp_data[["id"]],
          choice_bin = tmp_data[[paste0(person,"_ans1")]],
          choice_cont = tmp_data[[paste0(person,"_ans2")]],
          profile = tmp_data[[paste0(person,"_a")]])
      )
    }
  ) %>% do.call("rbind",.) %>% 
    separate(profile, c("gdp","jobs","supplies","lockdown","deaths","vaccinated"),sep= "\\|")
  
  return(conjoint_data)
}

translate_conjoint <- function(data, country) {
  
  if (country == "CHN") {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, "'-国内生产总值减少10%") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "国内生产总值下降-5%") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "0%的变化") ~ "0% change",
                          str_detect(data$gdp, "国内生产总值增长5%") ~ "5% increase GDP",
                          str_detect(data$gdp, "国内生产总值增加10%") ~ "10% increase GDP")
    
    data$jobs <- case_when(str_detect(data$jobs, "-10%下降的工作") ~ "-10% decline jobs",
                           str_detect(data$jobs, "-5%下降的工作") ~ "-5% decline jobs",
                           str_detect(data$jobs, "0%的变化") ~ "0% change",
                           str_detect(data$jobs, "增加5%的就业机会") ~ "5% increase jobs",
                           str_detect(data$jobs, "增加10%的就业机会") ~ "10% increase jobs")
    
    data$supplies <- case_when(str_detect(data$supplies, "政府迅速获得所有必要的COVID-19疫苗用品") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, "政府在获得必要的COVID-19疫苗供应方面进展缓慢") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies")
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10周禁闭") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, "20周禁闭") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, "禁闭30周") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, "40周禁闭") ~ "40 weeks lockdown")
    
    data$deaths <- case_when(str_detect(data$deaths, "每百万人中有10人死亡") ~ "10 deaths per million people",
                             str_detect(data$deaths, "每百万人中有30人死亡") ~ "30 deaths per million people",
                             str_detect(data$deaths, "每百万人中有50人死亡") ~ "50 deaths per million people",
                             str_detect(data$deaths, "每百万人中有70人死亡") ~ "70 deaths per million people",
                             str_detect(data$deaths, "每百万人中有90人死亡") ~ "90 deaths per million people")
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, "75%的人接种了疫苗") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, "50%接种了疫苗") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, "25%接种了疫苗") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, "15%接种了疫苗") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "5%接种了疫苗") ~ "5% vaccinated")
    
    
  } else if (country == "BR") {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, "'-10% de redução do PIB") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "'-5% de redução do PIB") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "0% de mudança") ~ "0% change",
                          str_detect(data$gdp, "5% de aumento do PIB") ~ "5% increase GDP",
                          str_detect(data$gdp, "10% de aumento do PIB") ~ "10% increase GDP")
    
    data$jobs <- case_when(str_detect(data$jobs, "-10% de redução de empregos") ~ "-10% decline jobs",
                           str_detect(data$jobs, "-5% de empregos em declínio") | str_detect(data$jobs, "-5% de redução de empregos")~ "-5% decline jobs",
                           str_detect(data$jobs, "0% de mudança") ~ "0% change",
                           str_detect(data$jobs, "5% de aumento de empregos") ~ "5% increase jobs",
                           str_detect(data$jobs, "10% de aumento de empregos") ~ "10% increase jobs",
                           str_detect(data$jobs, "10% de aumento do PIB") ~ NA_character_)
    
    data$supplies <- case_when(str_detect(data$supplies, "O governo obteve rapidamente todos os suprimentos necessários para a Vacina COVID-19") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, "O governo foi lento na obtenção dos suprimentos necessários para a vacina COVID-19") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies")
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10 semanas de bloqueio") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, "20 semanas de bloqueio") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, "30 semanas de bloqueio") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, "40 semanas de bloqueio") ~ "40 weeks lockdown")
    
    data$deaths <- case_when(str_detect(data$deaths, "10 mortes por milhão de pessoas") ~ "10 deaths per million people",
                             str_detect(data$deaths, "30 mortes por milhão de pessoas") ~ "30 deaths per million people",
                             str_detect(data$deaths, "50 mortes por milhão de pessoas") ~ "50 deaths per million people",
                             str_detect(data$deaths, "70 mortes por milhão de pessoas") ~ "70 deaths per million people",
                             str_detect(data$deaths, "90 mortes por milhão de pessoas") ~ "90 deaths per million people")
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, "75% vacinados") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, "50% vacinados") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, "25% vacinados") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, "15% vacinados") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "5% vacinados") ~ "5% vaccinated")
    
    
  } else if (country %in% c("CHL","COL","SP")) {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, "Caída del PIB del -10%") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "Caída del PIB del -5%") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "0% variación") ~ "0% change",
                          str_detect(data$gdp, "Aumento del PIB del 5%") ~ "5% increase GDP",
                          str_detect(data$gdp, "Aumento del PIB del 10%") ~ "10% increase GDP")
    
    data$jobs <- case_when(str_detect(data$jobs, "Caída del empleo del -10%") ~ "-10% decline jobs",
                           str_detect(data$jobs, "Caída del empleo del -5%") ~ "-5% decline jobs",
                           str_detect(data$jobs, "0% variación") ~ "0% change",
                           str_detect(data$jobs, "Aumento del empleo del 5%") ~ "5% increase jobs",
                           str_detect(data$jobs, "Aumento del empleo del 10%") ~ "10% increase jobs")
    
    data$supplies <- case_when(str_detect(data$supplies, "El gobierno rápidamente obtuvo todos los suministros necesarios de vacunas contra el COVID-19") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, "El gobierno fue lento al obtener los suministros necesarios de vacunas contra el COVID-19") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies")
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10 semanas de cuarentena") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, "20 semanas de cuarentena") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, "30 semanas de cuarentena") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, "40 semanas de cuarentena") ~ "40 weeks lockdown")
    
    data$deaths <- case_when(str_detect(data$deaths, "10 muertes por millón de personas") ~ "10 deaths per million people",
                             str_detect(data$deaths, "30 muertes por millón de personas") ~ "30 deaths per million people",
                             str_detect(data$deaths, "50 muertes por millón de personas") ~ "50 deaths per million people",
                             str_detect(data$deaths, "70 muertes por millón de personas") ~ "70 deaths per million people",
                             str_detect(data$deaths, "90 muertes por millón de personas") ~ "90 deaths per million people")
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, "75% vacunada") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, "50% vacunada") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, "25% vacunada") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, "15% vacunada") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "5% vacunada") ~ "5% vaccinated")
    
    
  } else if (country %in% c("FR","CAN")) {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, coll("Un recul de -10% du PIB (produit intérieur brut)")) ~ "'-10% decrease GDP",
                          str_detect(data$gdp, coll("Un recul de -5% du PIB (produit intérieur brut)")) ~ "'-5% decrease GDP",
                          str_detect(data$gdp, coll("Croissance nulle (0%) du PIB")) ~ "0% change",
                          str_detect(data$gdp, coll("Une croissance de +5% du PIB (produit intérieur brut)")) ~ "5% increase GDP",
                          str_detect(data$gdp, coll("Une croissance de +10% du PIB (produit intérieur brut)")) ~ "10% increase GDP",
                          TRUE ~ data$gdp)
    
    data$jobs <- case_when(str_detect(data$jobs, coll("Un recul de l’emploi de -10%")) ~ "-10% decline jobs",
                           str_detect(data$jobs, coll("Un recul de l’emploi de -5%")) ~ "-5% decline jobs",
                           str_detect(data$jobs, coll("Croissance nulle (0%) de l’emploi")) ~ "0% change",
                           str_detect(data$jobs, coll("Une croissance de l’emploi de +5%")) ~ "5% increase jobs",
                           str_detect(data$jobs, coll("Une croissance de l’emploi de +10%")) ~ "10% increase jobs",
                           TRUE ~ data$jobs)
    
    data$supplies <- case_when(str_detect(data$supplies, "L’approvisionnement des réserves en vaccins COVID-19 a été rapidement fait par le gouvernement") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, "L’approvisionnement des réserves en vaccins COVID-19 a été lentement fait par le gouvernement") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies",
                               TRUE ~ data$supplies)
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10 semaines du confinement en 2020") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, "20 semaines du confinement en 2020") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, "30 semaines du confinement en 2020") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, "40 semaines du confinement en 2020") ~ "40 weeks lockdown",
                               TRUE ~ data$lockdown)
    
    data$deaths <- case_when(str_detect(data$deaths, "10 décès pour 1 millions d'habitants") ~ "10 deaths per million people",
                             str_detect(data$deaths, "30 décès pour 1 millions d'habitants") ~ "30 deaths per million people",
                             str_detect(data$deaths, "50 décès pour 1 millions d'habitants") ~ "50 deaths per million people",
                             str_detect(data$deaths, "70 décès pour 1 millions d'habitants") ~ "70 deaths per million people",
                             str_detect(data$deaths, "90 décès pour 1 millions d'habitants") ~ "90 deaths per million people",
                             TRUE ~ data$deaths)
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, "15% vaccinés") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "25% vaccinés") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, "50% vaccinés") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, "75% vaccinés") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, "5% vaccinés") ~ "5% vaccinated",
                                 TRUE ~ data$vaccinated)
    
    
  } else if (country == "IT") {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, "'-10% diminuzione del PIL") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "'-5% diminuzione del PIL") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "Variazione dello 0%") ~ "0% change",
                          str_detect(data$gdp, "5% di aumento del PIL") ~ "5% increase GDP",
                          str_detect(data$gdp, "10% di aumento del PIL") ~ "10% increase GDP")
    
    data$jobs <- case_when(str_detect(data$jobs, "-10% diminuzione posti di lavoro") ~ "-10% decline jobs",
                           str_detect(data$jobs, "-5% diminuzione posti di lavoro") ~ "-5% decline jobs",
                           str_detect(data$jobs, "Cambiamento dello 0%") ~ "0% change",
                           str_detect(data$jobs, "Aumento del 5% dei posti di lavoro") ~ "5% increase jobs",
                           str_detect(data$jobs, "Aumento del 10% dei posti di lavoro") ~ "10% increase jobs")
    
    data$supplies <- case_when(str_detect(data$supplies, "Il governo ha ottenuto rapidamente tutte le forniture di vaccino COVID-19 necessarie") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, "Il governo è stato lento nell'ottenere le necessarie forniture di vaccino COVID-19") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies")
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10 settimane di lockdown") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, "20 settimane di lockdown") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, "30 settimane di lockdown") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, "40 settimane di lockdown") ~ "40 weeks lockdown")
    
    data$deaths <- case_when(str_detect(data$deaths, "10 morti per milione di persone") ~ "10 deaths per million people",
                             str_detect(data$deaths, "30 morti per milione di persone") ~ "30 deaths per million people",
                             str_detect(data$deaths, "50 morti per milione di persone") ~ "50 deaths per million people",
                             str_detect(data$deaths, "70 morti per milione di persone") ~ "70 deaths per million people",
                             str_detect(data$deaths, "90 morti per milione di persone") ~ "90 deaths per million people")
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, "75% vaccinati") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, "50% vaccinati") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, "25% vaccinati") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, "15% vaccinati") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "5% vaccinati") ~ "5% vaccinated")
    
  } else if (country == "JPN") {
    
    # GDP
    data$gdp <- case_when(str_detect(data$gdp, "'-GDP10％減") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "-GDP5％減") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "0％変動") ~ "0% change",
                          str_detect(data$gdp, "GDP5％増") ~ "5% increase GDP",
                          str_detect(data$gdp, "GDP10％増") ~ "10% increase GDP")
    
    data$jobs <- case_when(str_detect(data$jobs, " -雇用の10％減少") ~ "-10% decline jobs",
                           str_detect(data$jobs, " -雇用の5％減") ~ "-5% decline jobs",
                           str_detect(data$jobs, " 0％変動") ~ "0% change",
                           str_detect(data$jobs, " 雇用の5％増加") ~ "5% increase jobs",
                           str_detect(data$jobs, " 雇用の10％増加") ~ "10% increase jobs")
    
    data$supplies <- case_when(str_detect(data$supplies, "政府はCOVID-19ワクチンに必要な物資を迅速に入手した。") ~ "Government quickly obtained all necessary COVID-19 Vaccine Supplies",
                               str_detect(data$supplies, " 必要なCOVID-19ワクチンの調達が遅れている") ~ "Government was slow in obtaining necessary COVID-19 Vaccine Supplies")
    
    data$lockdown <- case_when(str_detect(data$lockdown, "10週間のロックダウン") ~ "10 weeks lockdown",
                               str_detect(data$lockdown, " 20週間のロックダウン") ~ "20 weeks lockdown",
                               str_detect(data$lockdown, " 30週間のロックダウン") ~ "30 weeks lockdown",
                               str_detect(data$lockdown, " 40週間のロックダウン") ~ "40 weeks lockdown")
    
    data$deaths <- case_when(str_detect(data$deaths, "100万人あたり10人の死者") ~ "10 deaths per million people",
                             str_detect(data$deaths, " 100万人あたり30人の死者") ~ "30 deaths per million people",
                             str_detect(data$deaths, " 100万人あたり50人の死者") ~ "50 deaths per million people",
                             str_detect(data$deaths, " 100万人当たりの死亡者数70人") ~ "70 deaths per million people",
                             str_detect(data$deaths, " 100万人当たりの死亡者数90人") ~ "90 deaths per million people")
    
    data$vaccinated <- case_when(str_detect(data$vaccinated, " ワクチン接種率75") ~ "75% vaccinated",
                                 str_detect(data$vaccinated, " ワクチン接種率50") ~ "50% vaccinated",
                                 str_detect(data$vaccinated, " 25% ワクチン接種済み") ~ "25% vaccinated",
                                 str_detect(data$vaccinated, " 15％接種") ~ "15% vaccinated",
                                 str_detect(data$vaccinated, "5％接種") ~ "5% vaccinated")
    
  } else if (country == "UGA") {
    
    data$gdp <- case_when(str_detect(data$gdp, "10% decrease in GDP") ~ "'-10% decrease GDP",
                          str_detect(data$gdp, "5% decrease in GDP") ~ "'-5% decrease GDP",
                          str_detect(data$gdp, "0% change  in GDP") ~ "0% change",
                          str_detect(data$gdp, "5% increase in GDP") ~ "5% increase GDP",
                          str_detect(data$gdp, "10% increase in GDP") ~ "10% increase GDP",
                          TRUE ~ data$gdp)
    
    data$jobs <- case_when(str_detect(data$jobs, "10% decline of jobs") ~ "-10% decline jobs",
                           str_detect(data$jobs, "5% decline of jobs") ~ "-5% decline jobs",
                           str_detect(data$jobs, "0% change of jobs") ~ "0% change",
                           str_detect(data$jobs, "5% increase of jobs") ~ "5% increase jobs",
                           str_detect(data$jobs, "10% increase of jobs") ~ "10% increase jobs")
    
  }
  
  data$choice_bin <- ifelse(str_detect(data$choice_bin,"YES|SÌ|SÍ|OUI|是|SIM,|はい"),1,0)
  
  return(data)
}

create_conjoint_data <- function(countries, ref_levels) {
  
  survey_list <- lapply(countries, function (x) read_csv(paste0("survey_data/data_",x,".csv")))
  
  country_conjoints <- lapply(survey_list, convert_conjoint)
  names(country_conjoints) <- countries
  
  combined_data <- lapply(countries, function (x) {
    print(x)
    translate_conjoint(country_conjoints[[x]], x[1]) %>% 
      mutate(country = x)
  }) %>% 
    do.call("rbind", .) %>% 
    mutate(country = as.factor(country))
  
  # return(combined_data)
  
  format_data <- combined_data %>%
    mutate(gdp = case_when(grepl("5% increase",gdp) ~ "+5%",
                           grepl("10% increase",gdp) ~ "+10%",
                           grepl("5% decrease",gdp) ~ "-5%",
                           grepl("10% decrease",gdp) ~ "-10%",
                           grepl("0% change",gdp) ~ "0%"),
           jobs = case_when(grepl("5% increase",jobs) ~ "+5%",
                            grepl("10% increase",jobs) ~ "+10%",
                            grepl("5% decline",jobs) ~ "-5%",
                            grepl("10% decline",jobs) ~ "-10%",
                            grepl("0% change",jobs) ~ "0%"),
           supplies = case_when(grepl("quickly", supplies) ~ "Quick to procure",
                                grepl("slow", supplies) ~ "Slow to procure"),
           lockdown = case_when(grepl("10", lockdown) ~ "10 weeks",
                                grepl("20", lockdown) ~ "20 weeks",
                                grepl("30", lockdown) ~ "30 weeks",
                                grepl("40", lockdown) ~ "40 weeks"),
           deaths = case_when(grepl("10", deaths) ~ "10 per million",
                              grepl("30", deaths) ~ "30 per million",
                              grepl("50", deaths) ~ "50 per million",
                              grepl("70", deaths) ~ "70 per million",
                              grepl("90", deaths) ~ "90 per million"),
           vaccinated = case_when(grepl("15% vaccinated", vaccinated) ~ "15%",
                                  grepl("25% vaccinated", vaccinated) ~ "25%",
                                  grepl("50% vaccinated", vaccinated) ~ "50%",
                                  grepl("75% vaccinated", vaccinated) ~ "75%",
                                  grepl("5% vaccinated", vaccinated) ~ "5%")) %>% 
    mutate(jobs = factor(jobs, levels = c("-10%","-5%","0%","+5%","+10%")),
           gdp = factor(gdp, levels = c("-10%","-5%","0%","+5%","+10%")),
           vaccinated = factor(vaccinated, levels = c("5%","15%","25%","50%","75%")))
  
  for (att in names(ref_levels)) {
    
    format_data[[att]] <- relevel(as.factor(format_data[[att]]), ref = ref_levels[att])
    
  }
  
  return(format_data)
  
}

conjoint_lm <- function(data, controls = NULL, country_fe = TRUE, round_fe = TRUE, y = "choice_bin",
                        cluster = NULL, weights = FALSE) {
  
  # Construct formula
  fmla_string <- paste0(y," ~ gdp + jobs + supplies + lockdown + deaths + vaccinated")
  
  if (!is.null(controls)) {
    fmla_string <- paste0(
      c(
        fmla_string,
        paste0(controls, collapse = " + ")
      ),
      collapse = " + "
    )
  }
  
  if (country_fe) {
    fmla_string <- paste0(fmla_string, " + country")
  }
  
  if (round_fe) {
    fmla_string <- paste0(fmla_string, " + round")
  }
  
  fmla = as.formula(fmla_string)
  
  if (weights) {
    return(lm(fmla, data, weights = data$weights))
  } else {
    return(lm(fmla, data))
  }
}

format_model <- function(mod, ref_levels, label, cluster = NA) {
  
  if (!is.na(cluster)) {
    mod_table <- coeftest(mod, vcov = vcovCL, cluster = as.formula(paste0("~", cluster)))[,]
  } else {
    mod_table <- summary(mod)$coefficients
  }
  
  mod_table <- as.data.frame(mod_table) %>% 
    rename(est = Estimate,
           std_error = `Std. Error`,
           t = `t value`,
           p = `Pr(>|t|)`) %>% 
    mutate(lower95 = est - 1.96*std_error,
           upper95 = est + 1.96*std_error,
           att_full = rownames(.))
  
  for (att in names(ref_levels)) {
    
    mod_table <- add_row(mod_table, est = 0, lower95 = NA, upper95 = NA, att_full = paste0(att,ref_levels[att]))
    
  }
  
  mod_table <- mutate(mod_table, model = label) %>% 
    mutate(att = case_when(str_starts(pattern = "gdp", string = att_full) ~ "GDP growth",
                           str_starts(pattern = "jobs", string = att_full) ~ "Job growth",
                           str_starts(pattern = "supplies", string = att_full) ~ "Vaccine procurement",
                           str_starts(pattern = "lockdown", string = att_full) ~ "Lockdown length",
                           str_starts(pattern = "deaths", string = att_full) ~ "Deaths",
                           str_starts(pattern = "vaccinated", string = att_full) ~ "Vaccination rate"),
           coef = case_when(str_starts(pattern = "gdp", string = att_full) ~ gsub("gdp","", att_full),
                            str_starts(pattern = "jobs", string = att_full) ~ gsub("jobs","", att_full),
                            str_starts(pattern = "supplies", string = att_full) ~ gsub("supplies","", att_full),
                            str_starts(pattern = "lockdown", string = att_full) ~ gsub("lockdown","", att_full),
                            str_starts(pattern = "deaths", string = att_full) ~ gsub("deaths","", att_full),
                            str_starts(pattern = "vaccinated", string = att_full) ~ gsub("vaccinated","", att_full))) %>% 
    filter(att_full != "(Intercept)", !is.na(att)) %>% 
    mutate(non_ref = as.factor(ifelse(is.na(std_error), 0, 1)),
           att = as.factor(att)) %>% 
    
    mutate(coef = factor(coef, levels = c("-10%","-5%","0%","5%","+5%","+10%","15%","25%","50%","75%",
                                          "10 weeks","20 weeks","30 weeks", "40 weeks",
                                          "10 per million","30 per million","50 per million","70 per million","90 per million",
                                          "Quick to procure","Slow to procure")))
  
  return(mod_table)
  
}

country_plot <- function(data) {
  
  region_data <- split(data,
                       data$region)
  
  fig_limits <- data %>% 
    mutate(x_low = est - 1.96*std_error,
           x_high = est + 1.96*std_error)
  
  fig_x_low <- min(fig_limits$x_low, na.rm = TRUE) - 0.01
  fig_x_high <- max(fig_limits$x_high, na.rm = TRUE) + 0.01
  
  regions <- str_sort(unique(data$region))
  region_plots <- list()
  
  n_colors <- length(unique(data$model_lab))
  
  color_start <- 1
  for (i in 1:length(region_data)) {
    color_end = color_start + length(unique(region_data[[i]]$model_lab)) - 1
    
    plot_colors <- c("#5A5156",as.vector(polychrome()[color_start:color_end]))
    
    region_plots[[i]] <- region_data[[i]] %>% 
      ggplot(aes(x = est, y = coef, xmin = lower95, xmax = upper95)) +
      geom_point(position = position_dodge(width = 1), size = 2, alpha = 0.7) +
      geom_errorbarh(height = 0, position = position_dodge(width = 1), size = 0.8, alpha = 0.7) +
      geom_vline(xintercept = 0, linetype = "dashed") +
      facet_grid(att~region, space = "free", scales = "free_y") +
      aes(color = model_lab) +
      scale_color_manual(values=as.vector(polychrome(length(unique(n_colors)))[color_start:color_end])) +
      labs(x = "AMCE", y = "", color = "") +
      scale_y_discrete(limits = rev(levels("term"))) +
      guides(color=guide_legend(ncol = 1, byrow = TRUE)) +
      xlim(fig_x_low, fig_x_high) +
      theme(legend.position = "bottom",
            text = element_text(size = 12),
            legend.text = element_text(size = 13),
            strip.text = element_text(size = 13),
            plot.margin = unit(c(0, 0, 0, 0), "cm")) +
      
      if (i == 1) {
        
        theme(axis.text.y = element_text(size = 13),
              strip.background.y = element_blank(),
              strip.text.y = element_blank())
        
      } else if (i != length(regions)) {
        
        theme(axis.text.y = element_blank(),
              axis.ticks.y = element_blank(),
              strip.background.y = element_blank(),
              strip.text.y = element_blank(),
              plot.margin=margin(l=-0.4,unit="cm"))
        
      } else {
        
        theme(axis.text.y = element_blank(),
              axis.ticks.y = element_blank(),
              plot.margin=margin(l=-0.4,unit="cm"))
        
      }
    
    color_start <- color_end + 1
  }
  
  plot_grid(plotlist = region_plots, nrow = 1,
            align = "h", rel_widths = c(2.85,1,1,1))
}


var_format <- function(x) {
  
  case_when(x == "age" ~ "Age",
            x == "cases_w2" ~ "No. of Covid-19 cases (region)",
            x == "country" ~ "Country",
            x == "covidexp_index" ~ "Covid-19 exposure (index)",
            x == "covidexp_index_abovemed" ~ "High Covid-19 exposure",
            x == "deaths_w2" ~ "Covid-19 deaths",
            x == "EDUCATION_LEVEL" ~ "Education level",
            x == "eq5d_rate_delta" ~ "Change in EQ5D",
            x == "eq5d_rate_now" ~ "EQ5D score",
            x == "food_pca" ~ "Food poverty (PCA)",
            x == "gender" ~ "Gender",
            x == "hh_inc_delta" ~ "Change in household income",
            x == "hh_inc_obj" ~ "Objective household income",
            x == "ideology" ~ "Ideology",
            x == "income_abovemed" ~ "High income",
            x == "pop2019_w2" ~ "2019 population",
            x == "round" ~ "Round",
            x == "vac_hes_4_1" ~ "Covid-19 exposure (retrospective)",
            x == "vac_hes_7" ~ "Covid-19 exposure (prospective)",
            x == "who_pca" ~ "Health score (PCA)",
            x == "dep_children" ~ "Has children dependents",
            x == "marital_status" ~ "Marital status",
            x == "REGION_0" ~ "Subnational region",
            x == "subj_vaccinated" ~ "Vaccinated?",
            x == "subj_vacc_refused" ~ "Refused vaccination?",
            x == "C1" ~ "OxCGRT: School closures (in days)",
            x == "C2" ~ "OxCGRT: Workplace closures (in days)",
            x == "C3" ~ "OxCGRT: Public event cancellations (in days)",
            x == "C4" ~ "OxCGRT: Gathering limits (in days)",
            x == "C5" ~ "OxCGRT: Transport closures (in days)",
            x == "C6" ~ "OxCGRT: Shelter-in-place (in days)",
            x == "C7" ~ "OxCGRT: Movement restrictions (in days)",
            x == "C8" ~ "OxCGRT: International travel restrictions (in days)",
            x == "E1" ~ "OxCGRT: Income support (in days)",
            x == "E2" ~ "OxCGRT: Debt/contract relief (in days)",
            x == "gov_relect" ~ "Intend to vote for incumbent",
            x == "gov_rate" ~ "Incumbent government rating",
            x == "C_pca" ~ "Containment policies (PCA)",
            x == "E_pca" ~ "Economic support policies (PCA)",
            x == "health_spend_pca" ~ "Support for pandemic research spending (PCA)",
            x == "health_compl_pca" ~ "Health measure compliance (PCA)",
            x == "intl_flights" ~ "No. of international flights",
            x == "POLCONV_VDEM" ~ "Political constraint",
            x == "system" ~ "Regime type"
            
            )
  
}

var_group <- function(x) {
  
  case_when(x == "age" ~ "Demographics",
            x == "cases_w2" ~ "(Sub-) national context",
            x == "country" ~ "(Sub-) national context",
            x == "covidexp_index" ~ "(Sub-) national context",
            x == "covidexp_index_abovemed" ~ "(Sub-) national context",
            x == "deaths_w2" ~ "(Sub-) national context",
            x == "EDUCATION_LEVEL" ~ "Demographics",
            x == "eq5d_rate_delta" ~ "Health",
            x == "eq5d_rate_now" ~ "Health",
            x == "food_pca" ~ "Economics",
            x == "gender" ~ "Demographics",
            x == "hh_inc_delta" ~ "Economics",
            x == "hh_inc_obj" ~ "Economics",
            x == "ideology" ~ "Politics",
            x == "income_abovemed" ~ "Economics",
            x == "pop2019_w2" ~ "(Sub-) national context",
            x == "round" ~ "Design",
            x == "vac_hes_4_1" ~ "Health",
            x == "vac_hes_7" ~ "Health",
            x == "who_pca" ~ "Health",
            x == "dep_children" ~ "Demographics",
            x == "marital_status" ~ "Demographics",
            x == "REGION_0" ~ "(Sub-) national context",
            x == "subj_vaccinated" ~ "Health",
            x == "subj_vacc_refused" ~ "Health",
            str_starts(x, "(C|E)[1-8]") ~ "(Sub-) national context",
            x == "gov_relect" ~ "Politics",
            x == "gov_rate" ~ "Politics",
            x == "C_pca" ~ "(Sub-) national context",
            x == "E_pca" ~ "(Sub-) national context",
            
            x == "health_spend_pca" ~ "Engagement",
            x == "health_compl_pca" ~ "Engagement",
            x == "intl_flights" ~ "Engagement",
            
            x == "POLCONV_VDEM" ~ "(Sub-) national context",
            x == "system" ~ "(Sub-) national context",

            )
  
}


wald_test <- function(mod1, mod2) {
  
  c_mod1 <- summary(mod1)$coefficients[2:6,]
  c_mod2 <- summary(mod2)$coefficients[2:6,]
  
  return((c_mod1[,1] - c_mod2[,1])/sqrt(c_mod1[,2]^2 + c_mod2[,2]^2))
  
}

wald_test_submodel <- function(models) {
  
  mod1 <- models[[1]]
  mod2 <- models[[2]]
  
  c_mod1 <- summary(mod1)$coefficients %>% 
    .[str_detect(rownames(.),"gdp|jobs|supplies|lockdown|deaths|vaccinated"),]
  c_mod2 <- summary(mod2)$coefficients %>% 
    .[str_detect(rownames(.),"gdp|jobs|supplies|lockdown|deaths|vaccinated"),]
  
  if (sum(rownames(c_mod1) == rownames(c_mod2)) != nrow(c_mod1)) {
    stop("incompatible models, please check")
  }
  
  coef <- rownames(c_mod1)
  diff <- c_mod1[,1] - c_mod2[,1]
  diff_se <- sqrt(c_mod1[,2]^2 + c_mod2[,2]^2)
  Z = diff/diff_se
  
  return(data.frame(coef, diff, diff_se, Z))
  
}

approval_subset <- function(data, subset, subset_label, subset_vals = NULL, split_at_median = FALSE) {
  
  data_s <- data %>% 
    mutate(subset_var = data[[subset]]) %>% 
    filter(!is.na(subset_var))
  
  if (split_at_median) {
    data_s$subset_var <- ifelse(data_s$subset_var <= median(data_s$subset_var), "Low", "High")
  }
  
  data_nest <- data_s %>% 
    group_by(subset_var) %>% 
    nest()
  
  if (!split_at_median & is.null(subset_vals)) {
    subset_vals = data_nest$subset_var
  }
  
  incumb_dom <- data_nest %>% 
    mutate(regs = map(data, ~lm(
      reelect ~ politics_1 + politics_3 + politics_5 + politics_7 + politics_9 + country,
      data = mutate(., across(starts_with("politics_"), function (x) x/10))
    )))
  
  incumb_net <- data_nest %>% 
    mutate(regs = map(data, ~lm(
      reelect ~ home_bias_1 + home_bias_2 + home_bias_3 + home_bias_4 + home_bias_5 + country,
      data = mutate(., across(starts_with("home_bias_"), function (x) x/10))
    )))
  
  incumb_both <- data_nest %>% 
    mutate(regs = map(data, ~lm(
      reelect ~ politics_1 + politics_3 + politics_5 + politics_7 + politics_9 +
        other_1 + other_3 + other_5 + other_7 + other_9 + country,
      data = mutate(., across(starts_with("other_|politics_"), function (x) x/10))
    )))
  
  n_mods <- (3*length(incumb_dom$subset_var))
  
  custom_gof_rows <- list()
  custom_gof_rows[[subset_label]] <- rep(subset_vals,3)
  custom_gof_rows[["Subject controls?"]] <- rep("No", n_mods)
  custom_gof_rows[["Country FE?"]] <- rep("Yes", n_mods)
  
  texreg::texreg(c(incumb_dom$regs, incumb_net$regs, incumb_both$regs), 
                 table = FALSE, tabular = TRUE,
                 custom.coef.map = list("politics_1" = "Handling of Covid-19 pandemic",
                                        "politics_3" = "Handling of lockdown policy",
                                        "politics_5" = "Handling of Covid-19 vaccine campaign",
                                        "politics_7" = "Handling of economic policies",
                                        "politics_9" = "Handling of Covid-19 related deaths",
                                        "home_bias_1" = "Handling of Covid-19 pandemic",
                                        "home_bias_2" = "Handling of lockdown policy",
                                        "home_bias_3" = "Handling of Covid-19 vaccine campaign",
                                        "home_bias_4" = "Handling of economic policies",
                                        "home_bias_5" = "Handling of Covid-19 related deaths",
                                        "other_1" = "Covid-19 pandemic (other governments)",
                                        "other_3" = "Lockdown policy (other governments)",
                                        "other_5" = "Covid-19 vaccine campaign (other governments)",
                                        "other_7" = "Economic policies (other governments)",
                                        "other_9" = "Covid-19 related deaths (other governments)"),
                 digits = 3,
                 custom.header = list(
                   "\\textit{Domestic}" = 1:length(incumb_dom$subset_var), 
                   "\\textit{Net}" = 1:length(incumb_dom$subset_var) + length(incumb_dom$subset_var), 
                   "\\textit{Domestic + Other}" = 1:length(incumb_dom$subset_var) + 2*length(incumb_dom$subset_var)
                 ),
                 custom.model.names = paste0("(",1:n_mods,")"),
                 custom.gof.rows = custom_gof_rows,
                 booktabs = TRUE,
                 use.packages = FALSE,
                 include.rsquared = FALSE,
                 file = paste0("tables/incumb_approval_regs_",str_to_upper(subset),".tex"))
  
  print("Level check:")
  print(data.frame(mods = incumb_dom$subset_var,
                   labels = subset_vals))
  
  
}